{
"cells": [
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"# requirements:\n",
"\n",
"%pip install -q scikit-dimension pandas numba matplotlib"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"data shape: (32, 100)\n",
"estimated IDs (DANCo, lPCA): 6.6024110947192325 5.0\n"
]
}
],
"source": [
"import skdim\n",
"import numpy as np\n",
"\n",
"#generate data : np.array (n_points x n_dim). Here a uniformly sampled 5-ball embedded in 10 dimensions\n",
"n_rows = 32\n",
"n_features = 100\n",
"n_components = 5\n",
"\n",
"data = np.zeros((n_rows, n_features))\n",
"\n",
"data[:,:n_components] = skdim.datasets.hyperBall(n=n_rows, d=n_components, radius=2)\n",
"\n",
"# add some correlation\n",
"data[:,8] = data[:, 1] + np.random.rand(n_rows)\n",
"data[:,12] = data[:, 1] + np.random.rand(n_rows)\n",
"data[:,20] = data[:, 2] + np.random.rand(n_rows)\n",
"\n",
"print('data shape:', data.shape)\n",
"\n",
"#estimate global intrinsic dimension\n",
"danco = skdim.id.DANCo().fit(data)\n",
"\n",
"#estimate local intrinsic dimension (dimension in k-nearest-neighborhoods around each point):\n",
"lpca = skdim.id.lPCA().fit_pw(data,\n",
" n_neighbors=n_rows-1,\n",
" n_jobs=1)\n",
"\n",
"#get estimated intrinsic dimension\n",
"print('estimated IDs (DANCo, lPCA):', danco.dimension_, np.mean(lpca.dimension_pw_))"
]
}
],
"metadata": {
"interpreter": {
"hash": "4d4c55ad0dd25f9ca95e4d49a929aa3f71bfb37020ae570a9996c3e164818202"
},
"kernelspec": {
"display_name": "Python 3.9.9 ('py3')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}